Load Required Libraries
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.3 ✓ purrr 0.3.4
✓ tibble 3.1.2 ✓ dplyr 1.0.6
✓ tidyr 1.1.3 ✓ stringr 1.4.0
✓ readr 1.4.0 ✓ forcats 0.5.1
── Conflicts ─────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(gapminder)
Nest the DataFrame by country and continent
gapminder.nested <- gapminder %>%
group_by(country, continent) %>%
nest()
gapminder.nested
Fit a linear regression of gdpPercap on year.
gapminder.lm <- lm(gdpPercap ~ year, data=gapminder)
summary(gapminder.lm)
Call:
lm(formula = gdpPercap ~ year, data = gapminder)
Residuals:
Min 1Q Median 3Q Max
-10507 -5434 -2809 2071 109228
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -249693.71 26678.48 -9.359 <2e-16 ***
year 129.78 13.48 9.630 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9602 on 1702 degrees of freedom
Multiple R-squared: 0.05167, Adjusted R-squared: 0.05112
F-statistic: 92.74 on 1 and 1702 DF, p-value: < 2.2e-16
Fit a regression for each country in your nested data.frame
country_model <- function(df) {
lm(gdpPercap ~ year, data = df)
}
gapminder.model <- gapminder.nested %>%
mutate(model = map(data, country_model),
coef = map(model, broom::tidy))
Questions
Question 1
gapminder.model$model[[1]]
Call:
lm(formula = gdpPercap ~ year, data = df)
Coefficients:
(Intercept) year
1674.8134 -0.4406
glance_df %>%
select(country, continent, r.squared) %>%
group_by(continent) %>%
summarize(r.squared=mean(r.squared)) %>%
arrange(r.squared)
Question 2
glance_df %>%
select(country, continent, r.squared) %>%
filter(r.squared < 0.25) %>%
arrange(country)
Question 3
glance_df %>%
select(country, continent, r.squared) %>%
group_by(continent) %>%
summarize(r.squared=min(r.squared)) %>%
arrange(r.squared)
Question 4
glance_df %>%
select(country, continent, r.squared) %>%
filter(country == 'Australia')
Question 5
glance_df %>%
ggplot(aes(continent, r.squared)) +
geom_jitter(aes(color=continent))

LS0tCnRpdGxlOiAiQWN0aXZpdHkgMiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyBMb2FkIFJlcXVpcmVkIExpYnJhcmllcwoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGdhcG1pbmRlcikKYGBgCgojIE5lc3QgdGhlIERhdGFGcmFtZSBieSBjb3VudHJ5IGFuZCBjb250aW5lbnQKCmBgYHtyfQpnYXBtaW5kZXIubmVzdGVkIDwtIGdhcG1pbmRlciAlPiUKICBncm91cF9ieShjb3VudHJ5LCBjb250aW5lbnQpICU+JQogIG5lc3QoKQoKCmdhcG1pbmRlci5uZXN0ZWQKYGBgCgojIEZpdCBhIGxpbmVhciByZWdyZXNzaW9uIG9mIGdkcFBlcmNhcCBvbiB5ZWFyLgoKYGBge3J9CmdhcG1pbmRlci5sbSA8LSBsbShnZHBQZXJjYXAgfiB5ZWFyLCBkYXRhPWdhcG1pbmRlcikKc3VtbWFyeShnYXBtaW5kZXIubG0pCmBgYAoKIyBGaXQgYSByZWdyZXNzaW9uIGZvciBlYWNoIGNvdW50cnkgaW4geW91ciBuZXN0ZWQgZGF0YS5mcmFtZQoKYGBge3J9CmNvdW50cnlfbW9kZWwgPC0gZnVuY3Rpb24oZGYpIHsKICBsbShnZHBQZXJjYXAgfiB5ZWFyLCBkYXRhID0gZGYpCn0KCmdhcG1pbmRlci5tb2RlbCAgPC0gZ2FwbWluZGVyLm5lc3RlZCAlPiUgCiAgbXV0YXRlKG1vZGVsID0gbWFwKGRhdGEsIGNvdW50cnlfbW9kZWwpLCAKICAgICAgICAgY29lZiA9IG1hcChtb2RlbCwgYnJvb206OnRpZHkpKQpgYGAKCiMgIENhbGN1bGF0ZSBtZWFzdXJlcyBmb3IgY2hlY2tpbmcgbW9kZWwgcGVyZm9ybWFuY2UKCmBgYHtyfQpnYXBtaW5kZXIuY29lZiAgPC0gZ2FwbWluZGVyLm1vZGVsICU+JSB1bm5lc3QoY29lZikKZ2FwbWluZGVyLmNvZWYKYGBgCgpgYGB7cn0KZ2FwbWluZGVyLmNvZWYgJT4lIAogIG11dGF0ZSh0ZXJtID0gZmN0X3JlY29kZSh0ZXJtLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgSW50ZXJjZXB0ID0gIihJbnRlcmNlcHQpIiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgIFNsb3BlID0gInllYXIiKSkgJT4lIAogIGdncGxvdChhZXMoZXN0aW1hdGUsIGZpbGwgPSB0ZXJtKSkgKyAKICBnZW9tX2RlbnNpdHkoc2hvdy5sZWdlbmQgPSBGQUxTRSwgYWxwaGEgPSAwLjUpICsgCiAgZ2VvbV9oaXN0b2dyYW0oY29sID0gImJsYWNrIiwgZmlsbCA9ICJsaWdodGdyZXkiLAogICAgICAgICAgICAgICAgIGFscGhhID0gMC41LAogICAgICAgICAgICAgICAgIGFlcyh5ID0gLi5kZW5zaXR5Li4pKSArIAogIGZhY2V0X3dyYXAofnRlcm0sIHNjYWxlcyA9ICJmcmVlIikgKyAKICBzY2FsZV9maWxsX2JyZXdlcihwYWxldHRlID0gIlNldDEiKSArIAogIHRoZW1lX21pbmltYWwoKSArIAogIGxhYnMoeSA9IE5VTEwsIHggPSAiRXN0aW1hdGUiKQpgYGAKCmBgYHtyfQpnbGFuY2VfZGYgPC0gZ2FwbWluZGVyLm1vZGVsICU+JQogIG11dGF0ZShnbGFuY2UgPSBtYXAobW9kZWwsIGJyb29tOjpnbGFuY2UpKSAlPiUKICB1bm5lc3QoZ2xhbmNlLCAuZHJvcCA9IFRSVUUpCgpnbGFuY2VfZGYKYGBgCgoKIyBRdWVzdGlvbnMKCiMjIFF1ZXN0aW9uIDEKCmBgYHtyfQpnYXBtaW5kZXIubW9kZWwkbW9kZWxbWzFdXQpgYGAKCmBgYHtyfQpnbGFuY2VfZGYgJT4lCiAgc2VsZWN0KGNvdW50cnksIGNvbnRpbmVudCwgci5zcXVhcmVkKSAlPiUKICBncm91cF9ieShjb250aW5lbnQpICU+JQogIHN1bW1hcml6ZShyLnNxdWFyZWQ9bWVhbihyLnNxdWFyZWQpKSAlPiUKICBhcnJhbmdlKHIuc3F1YXJlZCkKYGBgCgojIyBRdWVzdGlvbiAyCgpgYGB7cn0KZ2xhbmNlX2RmICU+JQogIHNlbGVjdChjb3VudHJ5LCBjb250aW5lbnQsIHIuc3F1YXJlZCkgJT4lCiAgZmlsdGVyKHIuc3F1YXJlZCA8IDAuMjUpICU+JQogIGFycmFuZ2UoY291bnRyeSkKYGBgCgojIyBRdWVzdGlvbiAzCgpgYGB7cn0KZ2xhbmNlX2RmICU+JQogIHNlbGVjdChjb3VudHJ5LCBjb250aW5lbnQsIHIuc3F1YXJlZCkgJT4lCiAgZ3JvdXBfYnkoY29udGluZW50KSAlPiUKICBzdW1tYXJpemUoci5zcXVhcmVkPW1pbihyLnNxdWFyZWQpKSAlPiUKICBhcnJhbmdlKHIuc3F1YXJlZCkKYGBgCgojIyBRdWVzdGlvbiA0CgpgYGB7cn0KZ2xhbmNlX2RmICU+JQogIHNlbGVjdChjb3VudHJ5LCBjb250aW5lbnQsIHIuc3F1YXJlZCkgJT4lCiAgZmlsdGVyKGNvdW50cnkgPT0gJ0F1c3RyYWxpYScpCmBgYAoKIyMgUXVlc3Rpb24gNQoKYGBge3J9CmdsYW5jZV9kZiAlPiUgCiAgZ2dwbG90KGFlcyhjb250aW5lbnQsIHIuc3F1YXJlZCkpICsgCiAgZ2VvbV9qaXR0ZXIoYWVzKGNvbG9yPWNvbnRpbmVudCkpCmBgYAoK